/*************************************************************************************** 
Firm-embedded productivity and cross-country income differences
Alviarez, Cravino and Ramondo
Journal of Political Economy (2022)

Program: model_07_selection.do
Date: October 2022

Description: Construct the bins for figure 7 OLS residuals prepared in Matlab

*****************************************************************************************/

*-------------------------------------------------------------------------------
global typeden=1
include "set_directories.do"
set memory 64g
global lf "LF"

*Log
cap log close
log using "${clogs}/model_07_selection.log", replace
*-------------------------------------------------------------------------------


local y=2016
local ctryrel "FR"
local dd=10
local lev "sector" 


*(1) Firm-sector FE (to create deciles based on this) 
*-----------------------------------------
clear all
use "${data}/firmFE_naics_sales_s1_base_woparent.dta", clear
keep if year==`y' 
replace firm_FE=firm_FE
keep year type hq guo_bvd firm_FE sector
duplicates drop 
egen decile_guo_FE = xtile(firm_FE), by(`lev' type year) n(`dd')
sum decile_guo_FE
tempfile decile_guo_FE
save `decile_guo_FE', replace


*(2) Measure by country-sector (by Z) 
*--------------------------------------------
local grvar0 "LP_ppp_emp_pwt" 

use "${data}/estimates_sec_naics_sales_s1_base_woparent.dta", clear
drop if sector1==sector
replace DA=. if num_aff<3
replace DP=. if num_aff<3

keep if year==`y' 
rename DA D_A
rename DP D_P
drop if D_A==. | (D_A==0 & isocode!="FR")

local var "D_A"
merge m:1 year isocode sector using "${data}/aggregates_tfp_lp_klems.dta" 
drop if _merge==2
drop _merge

foreach var in D_A  {
gen `var'_phi=(-1)*`var'*(0.2)
*---------------------------------------
gen b_`var'_phi_`grvar0'=ln_`grvar0' - `var'_phi
}

gen Z=b_D_A_phi_`grvar0'
egen decile_Z = xtile(Z), by(type `lev' year) n(`dd')
by type `lev' year, sort: egen num_ctry=count(Z)
gsort type year Z
by type year, sort: gen D_Waugh_rank=_n
rename D_A A
keep isocode type `lev' year A D_Waugh_rank R2 R2adj R2w R2wadj decile_Z* 
tempfile decile_Z
save `decile_Z', replace 


*(3) Measure by country-sector (by popularity, number of foreign affiliates operating in each country-sector pair) 
*--------------------------------------------
use isocode sector1 sector year GO_usd GO_usd_exp *ENT_m* using "${data}/klems_oecd_unido_orbis_sales_emp_exp.dta", clear
keep if isocode!=""
keep if isocode=="DK" | isocode=="JP" | isocode=="IT" | isocode=="KR" | isocode=="DE" | isocode=="FR" | isocode=="ES" |  isocode=="MX" |  isocode=="GB" |  isocode=="PL" |  isocode=="GR" |  isocode=="NL" |  isocode=="RO" |  isocode=="BE" |  isocode=="AT" |  isocode=="PT" |  isocode=="FI" |  isocode=="SE" |  isocode=="CZ" |  isocode=="SK" |  isocode=="BG" |  isocode=="HU" |  isocode=="HR" |  isocode=="LV" |  isocode=="SI" |  isocode=="LT" |  isocode=="EE" 
keep if sector1!=sector
keep if year==`y'

gen type=""
replace type="manuf" if sector1=="Manufacturing (C)"
replace type="serv" if sector1=="Market_Services (G-H-I-J-K-M-N-R-S-T)" 
replace type="nonmarket" if sector1=="Non-Market Economy"
replace type="others" if sector1=="Other_Goods (A-B-D-E-F)"
order year isocode sector

gen ENT_mnc2=ENT_mnc
egen decile_numaff = xtile(ENT_mnc2), by(type `lev' year) n(`dd')
drop if decile_numaff==.
tempfile decile_popularity
save `decile_popularity', replace 



*Bring this deciles to the residuals
*--------------------------------------------
clear all
use "${data}/residual_naics_sales_s1_base_woparent.dta", clear
keep if year==`y' 
$whichtypes
tab type

merge m:1 isocode type `lev' using `decile_popularity', keepusing(decile_*)
drop if _merge==2
drop _merge

merge m:1 isocode type `lev' year using `decile_Z', keepusing(decile_*)
drop if _merge==2
drop _merge

merge m:1 type sector  year hq guo_bvd using `decile_guo_FE', keepusing(decile_*)
drop if _merge==2
drop _merge

rename decile_Z dec_Z
rename decile_numaff dec_numaff
rename decile_guo_FE dec_guoFE

tempfile base0 
save `base0', replace


*Calculate the mean across all firms within a decile across sectors (Number of fimrs)
*--------------------------------------------
foreach ff in  dec_guoFE  { 
foreach xx in dec_Z dec_numaff {
display "`ff' / `xx'"

use `base0', clear
egen resid_sd=sd(lhs2)
gen `ff'_`xx'=resid/resid_sd
gen a=1
collapse (mean) lhs2 xb1 xbd1 resid* `ff'_`xx' (count) num_firm=a, by(`xx' `ff' year)

rename `ff' dec_firmsec
rename `xx' dec_ctrysec
keep if dec_firmsec!=. & dec_ctrysec!=.

tempfile `ff'_`xx'_s
save ``ff'_`xx'_s', replace
}
}
*
use `dec_guoFE_dec_Z_s', clear
foreach ff in  dec_guoFE  { 
foreach xx in dec_numaff {
display "`ff' / `xx'"
merge 1:1 dec_firmsec dec_ctrysec using ``ff'_`xx'_s'
des
drop _merge
}
}
tempfile final
save `final', replace
rename dec_ctrysec ctrysec
rename dec_firmsec firmsec

use `final', clear
foreach var of varlist dec_* {
   	local newname : subinstr local var "dec_" ""
   	rename `var' `newname'
}
foreach var of varlist guo* {
   	local newname : subinstr local var "dec_" ""
   	rename `var' `newname'
}
*
save "${data}/selection_tomatlab_type_`dd'_agg_woparent.dta", replace 

local llb guoFE_Z 
local lld guoFE_numaff 
local ll `llb'  `lld'
display "`ll'"



*------------------------------------------------------------------------------
*Prepare final data for figure 7
*------------------------------------------------------------------------------
foreach v of local ll {
use "${data}/selection_tomatlab_type_`dd'_agg_woparent.dta", clear
keep ctrysec firmsec `v'
reshape wide guo*, i(ctrysec) j(firmsec)
tempfile `v'
save ``v'', replace
}
*
clear all
use `guoFE_Z', clear
foreach v of local ll {
merge 1:1 ctrysec using ``v''	
drop _merge
}
export delimited ctrysec guo* using "${data}/selection_tomatlab_type_`dd'_agg_woparent.csv", replace
**Next go to Matlab**

log close 
